In [1]:
# %matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, iplot
In [2]:
plt.style.use('seaborn-white')
In [3]:
dt = pd.read_csv('./netflix-shows/netflix_titles.csv')
In [4]:
dt
Out[4]:
show_id type title director cast country date_added release_year rating duration listed_in description
0 81145628 Movie Norm of the North: King Sized Adventure Richard Finn, Tim Maltby Alan Marriott, Andrew Toth, Brian Dobson, Cole... United States, India, South Korea, China September 9, 2019 2019 TV-PG 90 min Children & Family Movies, Comedies Before planning an awesome wedding for his gra...
1 80117401 Movie Jandino: Whatever it Takes NaN Jandino Asporaat United Kingdom September 9, 2016 2016 TV-MA 94 min Stand-Up Comedy Jandino Asporaat riffs on the challenges of ra...
2 70234439 TV Show Transformers Prime NaN Peter Cullen, Sumalee Montano, Frank Welker, J... United States September 8, 2018 2013 TV-Y7-FV 1 Season Kids' TV With the help of three human allies, the Autob...
3 80058654 TV Show Transformers: Robots in Disguise NaN Will Friedle, Darren Criss, Constance Zimmer, ... United States September 8, 2018 2016 TV-Y7 1 Season Kids' TV When a prison ship crash unleashes hundreds of...
4 80125979 Movie #realityhigh Fernando Lebrija Nesta Cooper, Kate Walsh, John Michael Higgins... United States September 8, 2017 2017 TV-14 99 min Comedies When nerdy high schooler Dani finally attracts...
5 80163890 TV Show Apaches NaN Alberto Ammann, Eloy Azorín, Verónica Echegui,... Spain September 8, 2017 2016 TV-MA 1 Season Crime TV Shows, International TV Shows, Spanis... A young journalist is forced into a life of cr...
6 70304989 Movie Automata Gabe Ibáñez Antonio Banderas, Dylan McDermott, Melanie Gri... Bulgaria, United States, Spain, Canada September 8, 2017 2014 R 110 min International Movies, Sci-Fi & Fantasy, Thrillers In a dystopian future, an insurance adjuster f...
7 80164077 Movie Fabrizio Copano: Solo pienso en mi Rodrigo Toro, Francisco Schultz Fabrizio Copano Chile September 8, 2017 2017 TV-MA 60 min Stand-Up Comedy Fabrizio Copano takes audience participation t...
8 80117902 TV Show Fire Chasers NaN NaN United States September 8, 2017 2017 TV-MA 1 Season Docuseries, Science & Nature TV As California's 2016 fire season rages, brave ...
9 70304990 Movie Good People Henrik Ruben Genz James Franco, Kate Hudson, Tom Wilkinson, Omar... United States, United Kingdom, Denmark, Sweden September 8, 2017 2014 R 90 min Action & Adventure, Thrillers A struggling couple can't believe their luck w...
10 80169755 Movie Joaquín Reyes: Una y no más José Miguel Contreras Joaquín Reyes NaN September 8, 2017 2017 TV-MA 78 min Stand-Up Comedy Comedian and celebrity impersonator Joaquín Re...
11 70299204 Movie Kidnapping Mr. Heineken Daniel Alfredson Jim Sturgess, Sam Worthington, Ryan Kwanten, A... Netherlands, Belgium, United Kingdom, United S... September 8, 2017 2015 R 95 min Action & Adventure, Dramas, International Movies When beer magnate Alfred "Freddy" Heineken is ...
12 80182480 Movie Krish Trish and Baltiboy NaN Damandeep Singh Baggan, Smita Malhotra, Baba S... NaN September 8, 2017 2009 TV-Y7 58 min Children & Family Movies A team of minstrels, including a monkey, cat a...
13 80182483 Movie Krish Trish and Baltiboy: Battle of Wits Munjal Shroff, Tilak Shetty Damandeep Singh Baggan, Smita Malhotra, Baba S... NaN September 8, 2017 2013 TV-Y7 62 min Children & Family Movies An artisan is cheated of his payment, a lion o...
14 80182596 Movie Krish Trish and Baltiboy: Best Friends Forever Munjal Shroff, Tilak Shetty Damandeep Singh Baggan, Smita Malhotra, Deepak... NaN September 8, 2017 2016 TV-Y 65 min Children & Family Movies A cat, monkey and donkey team up to narrate fo...
15 80182482 Movie Krish Trish and Baltiboy: Comics of India Tilak Shetty Damandeep Singh Baggan, Smita Malhotra, Baba S... NaN September 8, 2017 2012 TV-Y7 61 min Children & Family Movies In three comic-strip-style tales, a boy tries ...
16 80182597 Movie Krish Trish and Baltiboy: Oversmartness Never ... Tilak Shetty Rishi Gambhir, Smita Malhotra, Deepak Chachra NaN September 8, 2017 2017 TV-Y7 65 min Children & Family Movies A cat, monkey and donkey learn the consequence...
17 80182481 Movie Krish Trish and Baltiboy: Part II NaN Damandeep Singh Baggan, Smita Malhotra, Baba S... NaN September 8, 2017 2010 TV-Y7 58 min Children & Family Movies Animal minstrels narrate stories about a monke...
18 80182621 Movie Krish Trish and Baltiboy: The Greatest Trick Munjal Shroff, Tilak Shetty Damandeep Singh Baggan, Smita Malhotra, Baba S... NaN September 8, 2017 2013 TV-Y7 60 min Children & Family Movies The consequences of trickery are explored in s...
19 80057969 Movie Love Gaspar Noé Karl Glusman, Klara Kristin, Aomi Muyock, Ugo ... France, Belgium September 8, 2017 2015 NR 135 min Cult Movies, Dramas, Independent Movies A man in an unsatisfying marriage recalls the ...
20 80060297 Movie Manhattan Romance Tom O'Brien Tom O'Brien, Katherine Waterston, Caitlin Fitz... United States September 8, 2017 2014 TV-14 98 min Comedies, Independent Movies, Romantic Movies A filmmaker working on a documentary about lov...
21 80046728 Movie Moonwalkers Antoine Bardou-Jacquet Ron Perlman, Rupert Grint, Robert Sheehan, Ste... France, Belgium September 8, 2017 2015 R 96 min Action & Adventure, Comedies, International Mo... A brain-addled war vet, a failing band manager...
22 80046727 Movie Rolling Papers Mitch Dickman NaN United States, Uruguay September 8, 2017 2015 TV-MA 79 min Documentaries As the newspaper industry takes a hit, The Den...
23 70304988 Movie Stonehearst Asylum Brad Anderson Kate Beckinsale, Jim Sturgess, David Thewlis, ... United States September 8, 2017 2014 PG-13 113 min Horror Movies, Thrillers In 1899, a young doctor arrives at an asylum f...
24 80057700 Movie The Runner Austin Stark Nicolas Cage, Sarah Paulson, Connie Nielsen, W... United States September 8, 2017 2015 R 90 min Dramas, Independent Movies A New Orleans politician finds his idealistic ...
25 80045922 Movie 6 Years Hannah Fidell Taissa Farmiga, Ben Rosenfield, Lindsay Burdge... United States September 8, 2015 2015 NR 80 min Dramas, Independent Movies, Romantic Movies As a volatile young couple who have been toget...
26 80244601 TV Show Castle of Stars NaN Chaiyapol Pupart, Jintanutda Lummakanon, Worra... NaN September 7, 2018 2015 TV-14 1 Season International TV Shows, Romantic TV Shows, TV ... As four couples with different lifestyles go t...
27 80203094 Movie City of Joy Madeleine Gavin NaN United States, September 7, 2018 2018 TV-MA 77 min Documentaries Women who've been sexually brutalized in war-t...
28 80190843 TV Show First and Last NaN NaN NaN September 7, 2018 2018 TV-MA 1 Season Docuseries Take an intimate look at the emotionally charg...
29 70241607 Movie Laddaland Sopon Sukdapisit Saharat Sangkapreecha, Pok Piyatida Woramusik,... Thailand September 7, 2018 2011 TV-MA 112 min Horror Movies, International Movies When a family moves into an upscale housing de...
... ... ... ... ... ... ... ... ... ... ... ... ...
6204 80091341 TV Show Cuckoo NaN Andy Samberg, Taylor Lautner, Greg Davies, Hel... United Kingdom April 19, 2019 2019 TV-14 5 Seasons British TV Shows, International TV Shows, TV C... Rachel shocks her proper British parents when ...
6205 80036747 TV Show Pororo - The Little Penguin NaN NaN South Korea April 19, 2019 2013 TV-Y 2 Seasons Kids' TV, Korean TV Shows On a tiny island, Pororo the penguin has fun a...
6206 80173174 TV Show Samantha! NaN Emmanuelle Araújo, Douglas Silva, Sabrina Nona... Brazil April 19, 2019 2019 TV-MA 2 Seasons International TV Shows, TV Comedies A child star in the '80s, Samantha clings to t...
6207 80190407 TV Show Murderous Affairs NaN NaN United States April 17, 2018 2017 TV-14 3 Seasons Crime TV Shows, Docuseries Mixing interviews with dramatic re-enactments,...
6208 70227189 TV Show Lost Girl NaN Anna Silk, Kris Holden-Ried, Ksenia Solo, Rich... Canada April 17, 2016 2015 TV-14 5 Seasons TV Dramas, TV Horror, TV Mysteries Discovering she's a succubus who sustains hers...
6209 70264078 TV Show Mr. Young NaN Brendan Meyer, Matreya Fedor, Gig Morton, Kurt... Canada April 16, 2019 2013 TV-G 2 Seasons Kids' TV, TV Comedies After Adam graduates from college at age 14, h...
6210 80239700 TV Show Psiconautas NaN Guillermo Toledo, Gabriel Goity, Florencia Peñ... Argentina April 15, 2018 2016 TV-MA 2 Seasons International TV Shows, Spanish-Language TV Sh... A Spanish con man masquerades as a therapist a...
6211 80231523 TV Show The Minimighty Kids NaN NaN France April 15, 2018 2012 TV-G 2 Seasons Kids' TV, TV Comedies Some have big feet or a sniffly nose, others a...
6212 80126877 TV Show Filinta NaN Onur Tuna, Serhat Tutumluer, Mehmet Özgür, Naz... Turkey April 15, 2017 2015 TV-14 2 Seasons Crime TV Shows, International TV Shows, TV Act... In 19th-century Istanbul, a young police offic...
6213 80126599 TV Show Leyla and Mecnun Onur Ünlü Ali Atay, Melis Birkan, Serkan Keskin, Ahmet M... Turkey April 15, 2017 2014 TV-PG 3 Seasons International TV Shows, Romantic TV Shows, TV ... Destiny brings Mecnun and Leyla together as ne...
6214 80049872 TV Show Chelsea NaN NaN United States April 14, 2017 2017 TV-MA 2 Seasons Stand-Up Comedy & Talk Shows, TV Comedies It's not her first talk show, but it is a firs...
6215 80066227 TV Show Crazy Ex-Girlfriend NaN Rachel Bloom, Vincent Rodriguez III, Santino F... United States April 13, 2019 2019 TV-14 4 Seasons Romantic TV Shows, TV Comedies Still pining for Josh, the boy who dumped her ...
6216 80108373 TV Show The Magic School Bus Rides Again NaN Kate McKinnon, Miles Koseleci-Vieira, Mikaela ... United States April 13, 2018 2018 TV-Y 2 Seasons Kids' TV Ms. Frizzle's kid sister Fiona takes the wheel...
6217 70196145 TV Show New Girl NaN Zooey Deschanel, Jake Johnson, Max Greenfield,... United States April 11, 2019 2017 TV-14 7 Seasons Romantic TV Shows, TV Comedies Still rebounding from a breakup, Jessica Day m...
6218 80162994 TV Show Talking Tom and Friends NaN Colin Hanks, Tom Kenny, James Adomian, Lisa Sc... Cyprus, Austria, Thailand April 10, 2019 2017 TV-G 2 Seasons Kids' TV, TV Comedies Full of funny one-liners and always ready for ...
6219 80186475 TV Show Pokémon the Series NaN Sarah Natochenny, Laurie Hymes, Jessica Paquet... Japan April 1, 2019 2019 TV-Y7-FV 2 Seasons Anime Series, Kids' TV Ash and his Pikachu travel to the Alola region...
6220 70272742 TV Show Justin Time NaN Gage Munroe, Scott McCord, Jenna Warren Canada April 1, 2016 2012 TV-Y 2 Seasons Kids' TV In Justin's dreams, he and his imaginary frien...
6221 80067942 TV Show Terrace House: Boys & Girls in the City NaN You, Reina Triendl, Ryota Yamasato, Yoshimi To... Japan April 1, 2016 2016 TV-14 2 Seasons International TV Shows, Reality TV A new set of six men and women start their liv...
6222 70136122 TV Show Weeds NaN Mary-Louise Parker, Hunter Parrish, Alexander ... United States April 1, 2014 2012 TV-MA 8 Seasons TV Comedies, TV Dramas A suburban mother starts selling marijuana to ...
6223 70204989 TV Show Gunslinger Girl NaN Yuuka Nanri, Kanako Mitsuhashi, Eri Sendai, Am... Japan NaN 2008 TV-14 2 Seasons Anime Series, Crime TV Shows On the surface, the Social Welfare Agency appe...
6224 70304979 TV Show Anthony Bourdain: Parts Unknown NaN Anthony Bourdain United States NaN 2018 TV-PG 5 Seasons Docuseries This CNN original series has chef Anthony Bour...
6225 70153412 TV Show Frasier NaN Kelsey Grammer, Jane Leeves, David Hyde Pierce... United States NaN 2003 TV-PG 11 Seasons Classic & Cult TV, TV Comedies Frasier Crane is a snooty but lovable Seattle ...
6226 70243132 TV Show La Familia P. Luche NaN Eugenio Derbez, Consuelo Duval, Luis Manuel Áv... United States NaN 2012 TV-14 3 Seasons International TV Shows, Spanish-Language TV Sh... This irreverent sitcom featues Ludovico, Feder...
6227 80005756 TV Show The Adventures of Figaro Pho NaN Luke Jurevicius, Craig Behenna, Charlotte Haml... Australia NaN 2015 TV-Y7 2 Seasons Kids' TV, TV Comedies Imagine your worst fears, then multiply them: ...
6228 80159925 TV Show Kikoriki NaN Igor Dmitriev NaN NaN 2010 TV-Y 2 Seasons Kids' TV A wacky rabbit and his gang of animal pals hav...
6229 80000063 TV Show Red vs. Blue NaN Burnie Burns, Jason Saldaña, Gustavo Sorola, G... United States NaN 2015 NR 13 Seasons TV Action & Adventure, TV Comedies, TV Sci-Fi ... This parody of first-person shooter games, mil...
6230 70286564 TV Show Maron NaN Marc Maron, Judd Hirsch, Josh Brener, Nora Zeh... United States NaN 2016 TV-MA 4 Seasons TV Comedies Marc Maron stars as Marc Maron, who interviews...
6231 80116008 Movie Little Baby Bum: Nursery Rhyme Friends NaN NaN NaN NaN 2016 NaN 60 min Movies Nursery rhymes and original music for children...
6232 70281022 TV Show A Young Doctor's Notebook and Other Stories NaN Daniel Radcliffe, Jon Hamm, Adam Godley, Chris... United Kingdom NaN 2013 TV-MA 2 Seasons British TV Shows, TV Comedies, TV Dramas Set during the Russian Revolution, this comic ...
6233 70153404 TV Show Friends NaN Jennifer Aniston, Courteney Cox, Lisa Kudrow, ... United States NaN 2003 TV-14 10 Seasons Classic & Cult TV, TV Comedies This hit sitcom follows the merry misadventure...

6234 rows × 12 columns

In [5]:
# addding extra feature
dt['date_added'] = pd.to_datetime(dt['date_added'])
dt['year_added'] = dt['date_added'].dt.year
dt['month_added'] = dt['date_added'].dt.month
In [6]:
dt['season_count'] = dt.apply(lambda x : x['duration'].split(" ")[0] if "Season" in x['duration'] else "",axis = 1)
dt['duration'] = dt.apply(lambda x : x['duration'].split(" ")[0] if "Season" not in x['duration'] else "",axis = 1)
In [7]:
dt.head()
Out[7]:
show_id type title director cast country date_added release_year rating duration listed_in description year_added month_added season_count
0 81145628 Movie Norm of the North: King Sized Adventure Richard Finn, Tim Maltby Alan Marriott, Andrew Toth, Brian Dobson, Cole... United States, India, South Korea, China 2019-09-09 2019 TV-PG 90 Children & Family Movies, Comedies Before planning an awesome wedding for his gra... 2019.0 9.0
1 80117401 Movie Jandino: Whatever it Takes NaN Jandino Asporaat United Kingdom 2016-09-09 2016 TV-MA 94 Stand-Up Comedy Jandino Asporaat riffs on the challenges of ra... 2016.0 9.0
2 70234439 TV Show Transformers Prime NaN Peter Cullen, Sumalee Montano, Frank Welker, J... United States 2018-09-08 2013 TV-Y7-FV Kids' TV With the help of three human allies, the Autob... 2018.0 9.0 1
3 80058654 TV Show Transformers: Robots in Disguise NaN Will Friedle, Darren Criss, Constance Zimmer, ... United States 2018-09-08 2016 TV-Y7 Kids' TV When a prison ship crash unleashes hundreds of... 2018.0 9.0 1
4 80125979 Movie #realityhigh Fernando Lebrija Nesta Cooper, Kate Walsh, John Michael Higgins... United States 2017-09-08 2017 TV-14 99 Comedies When nerdy high schooler Dani finally attracts... 2017.0 9.0
In [8]:
col = "type"
grouped = dt[col].value_counts().reset_index()
grouped = grouped.rename(columns = {col : "Count", "index" : col})
In [9]:
grouped.head()
Out[9]:
type Count
0 Movie 4265
1 TV Show 1969
In [10]:
fig = plt.figure(figsize=(5,5))
explode = [0.0,0.05]
plt.pie(grouped['Count'] ,labels= ['Movie','TV Show'],explode = explode,shadow = False,autopct='%1.0f%%',startangle = 20,colors = ['#48BFA0','#F9ED94','#E6543B'])
plt.title('Pie Chart')
plt.tight_layout()
plt.show()
In [11]:
dt1 = dt[dt['type'] == 'TV Show']
dt2 = dt[dt['type'] == 'Movie']

col = 'year_added'
In [12]:
vc1 = dt1[col].value_counts().reset_index()
vc1 = vc1.rename(columns = {col:'count','index':col})
vc1['percent'] = vc1['count'].apply(lambda x: 100 * x / sum(vc1['count']))
vc1 = vc1.sort_values(col)
In [13]:
vc1.head()
Out[13]:
year_added count percent
9 2008.0 1 0.051046
8 2012.0 3 0.153139
7 2013.0 6 0.306279
6 2014.0 6 0.306279
5 2015.0 32 1.633486
In [14]:
vc2 = dt2[col].value_counts().reset_index()
vc2 = vc2.rename(columns = {col:'count','index':col})
vc2['percent'] = vc2['count'].apply(lambda x: 100 * x / sum(vc2['count']))
vc2 = vc2.sort_values(col)
In [15]:
vc2.head()
Out[15]:
year_added count percent
12 2008.0 1 0.023452
10 2009.0 2 0.046904
11 2010.0 1 0.023452
7 2011.0 13 0.304878
9 2012.0 4 0.093809
In [16]:
fig=plt.figure(figsize=(14, 8), dpi= 300)

plt.plot(vc1[col],vc1['count'],label='TV Show',marker='o',color='#48BFA0')
plt.plot(vc2[col],vc2['count'],label='Movie',marker='o',color='#E6543B')
plt.title('Content added over the years')
plt.xlabel('Year')
plt.ylabel('Count')
plt.legend()
plt.grid(True)
plt.tight_layout()
In [17]:
col = 'release_year'

vc1 = dt1[col].value_counts().reset_index()
vc1 = vc1.rename(columns = {col:'count','index':col})
vc1['percent'] = vc1['count'].apply(lambda x: 100 * x / sum(vc1['count']))
vc1 = vc1.sort_values(col)

vc2 = dt2[col].value_counts().reset_index()
vc2 = vc2.rename(columns = {col:'count','index':col})
vc2['percent'] = vc2['count'].apply(lambda x: 100 * x / sum(vc2['count']))
vc2 = vc2.sort_values(col)
In [18]:
fig=plt.figure(figsize=(14, 8), dpi= 300)
width = 0.3

plt.bar(vc1[col] + width/1.5,vc1['count'],label='TV Show',color='#48BFA0', width = width)
plt.bar(vc2[col] - width/1.5,vc2['count'],label='Movie',color='#E6543B', width = width)
plt.legend()
plt.title('Content released over the years')
plt.xlabel('Year')
plt.ylabel('Count')
plt.grid(True)
plt.tight_layout()
In [19]:
trace1 = go.Bar(x=vc1[col],y=vc1['count'],name='TV Shows',marker = dict(color='#48BFA0'))
trace2 = go.Bar(x=vc2[col],y=vc2['count'],name='Movies',marker = dict(color='#E6543B'))

data = [trace1,trace2]

layout = go.Layout(title = "Content released over the years",legend=dict(x=0.1,y=1.1,orientation='h'))
fig = go.Figure(data,layout=layout)
fig.show()
In [20]:
col = 'month_added'

vc1 = dt1[col].value_counts().reset_index()
vc1 = vc1.rename(columns = {col:'count','index':col})
vc1['percent'] = vc1['count'].apply(lambda x: 100 * x / sum(vc1['count']))
vc1 = vc1.sort_values(col)
In [21]:
trace1 = go.Bar(x=vc1[col],y=vc1['count'],name='TV Shows',marker = dict(color = '#48BFA0'), width = 0.3)
data = [trace1]
layout = go.Layout(title = 'Content added by Months',legend=dict(x=0.1,y=1.1,orientation='h'))
fig = go.Figure(data,layout = layout)
fig.show()
In [22]:
print('Some Oldest movies on Netflix')
small = dt.sort_values('release_year',ascending = True)
small = small[small['duration']!='']
small[['title','release_year']][:15]
Some Oldest movies on Netflix
Out[22]:
title release_year
2011 Prelude to War 1942
2013 The Battle of Midway 1942
2022 Undercover: How to Operate Behind Enemy Lines 1943
2023 Why We Fight: The Battle of Russia 1943
2026 WWII: Report from the Aleutians 1943
2017 The Memphis Belle: A Story of a\nFlying Fortress 1944
2019 The Negro Soldier 1944
2021 Tunisian Victory 1944
2012 San Pietro 1945
2009 Nazi Concentration Camps 1945
2005 Know Your Enemy - Japan 1945
2930 The Stranger 1946
2006 Let There Be Light 1946
2020 Thunderbolt 1947
2981 White Christmas 1954
In [23]:
print('Some Oldest movies on Netflix')
small = dt.sort_values('release_year',ascending = True)
small = small[small['season_count']!='']
small[['title','release_year']][:15]
Some Oldest movies on Netflix
Out[23]:
title release_year
4292 Pioneers: First Women Filmmakers* 1925
4079 Pioneers of African-American Cinema 1946
5981 The Twilight Zone (Original Series) 1963
5980 The Andy Griffith Show 1967
5704 Star Trek 1968
614 Monty Python's Fliegender Zirkus 1972
5669 Monty Python's Flying Circus 1974
5830 Dad's Army 1977
4273 El Chavo 1979
4369 Ninja Hattori 1981
982 Robotech 1985
6016 Saint Seiya 1986
3473 Shaka Zulu 1986
5774 Highway to Heaven 1988
369 High Risk 1988
In [24]:
country_codes = {'afghanistan': 'AFG',
 'albania': 'ALB',
 'algeria': 'DZA',
 'american samoa': 'ASM',
 'andorra': 'AND',
 'angola': 'AGO',
 'anguilla': 'AIA',
 'antigua and barbuda': 'ATG',
 'argentina': 'ARG',
 'armenia': 'ARM',
 'aruba': 'ABW',
 'australia': 'AUS',
 'austria': 'AUT',
 'azerbaijan': 'AZE',
 'bahamas': 'BHM',
 'bahrain': 'BHR',
 'bangladesh': 'BGD',
 'barbados': 'BRB',
 'belarus': 'BLR',
 'belgium': 'BEL',
 'belize': 'BLZ',
 'benin': 'BEN',
 'bermuda': 'BMU',
 'bhutan': 'BTN',
 'bolivia': 'BOL',
 'bosnia and herzegovina': 'BIH',
 'botswana': 'BWA',
 'brazil': 'BRA',
 'british virgin islands': 'VGB',
 'brunei': 'BRN',
 'bulgaria': 'BGR',
 'burkina faso': 'BFA',
 'burma': 'MMR',
 'burundi': 'BDI',
 'cabo verde': 'CPV',
 'cambodia': 'KHM',
 'cameroon': 'CMR',
 'canada': 'CAN',
 'cayman islands': 'CYM',
 'central african republic': 'CAF',
 'chad': 'TCD',
 'chile': 'CHL',
 'china': 'CHN',
 'colombia': 'COL',
 'comoros': 'COM',
 'congo democratic': 'COD',
 'Congo republic': 'COG',
 'cook islands': 'COK',
 'costa rica': 'CRI',
 "cote d'ivoire": 'CIV',
 'croatia': 'HRV',
 'cuba': 'CUB',
 'curacao': 'CUW',
 'cyprus': 'CYP',
 'czech republic': 'CZE',
 'denmark': 'DNK',
 'djibouti': 'DJI',
 'dominica': 'DMA',
 'dominican republic': 'DOM',
 'ecuador': 'ECU',
 'egypt': 'EGY',
 'el salvador': 'SLV',
 'equatorial guinea': 'GNQ',
 'eritrea': 'ERI',
 'estonia': 'EST',
 'ethiopia': 'ETH',
 'falkland islands': 'FLK',
 'faroe islands': 'FRO',
 'fiji': 'FJI',
 'finland': 'FIN',
 'france': 'FRA',
 'french polynesia': 'PYF',
 'gabon': 'GAB',
 'gambia, the': 'GMB',
 'georgia': 'GEO',
 'germany': 'DEU',
 'ghana': 'GHA',
 'gibraltar': 'GIB',
 'greece': 'GRC',
 'greenland': 'GRL',
 'grenada': 'GRD',
 'guam': 'GUM',
 'guatemala': 'GTM',
 'guernsey': 'GGY',
 'guinea-bissau': 'GNB',
 'guinea': 'GIN',
 'guyana': 'GUY',
 'haiti': 'HTI',
 'honduras': 'HND',
 'hong kong': 'HKG',
 'hungary': 'HUN',
 'iceland': 'ISL',
 'india': 'IND',
 'indonesia': 'IDN',
 'iran': 'IRN',
 'iraq': 'IRQ',
 'ireland': 'IRL',
 'isle of man': 'IMN',
 'israel': 'ISR',
 'italy': 'ITA',
 'jamaica': 'JAM',
 'japan': 'JPN',
 'jersey': 'JEY',
 'jordan': 'JOR',
 'kazakhstan': 'KAZ',
 'kenya': 'KEN',
 'kiribati': 'KIR',
 'north korea': 'PRK',
 'south korea': 'KOR',
 'kosovo': 'KSV',
 'kuwait': 'KWT',
 'kyrgyzstan': 'KGZ',
 'laos': 'LAO',
 'latvia': 'LVA',
 'lebanon': 'LBN',
 'lesotho': 'LSO',
 'liberia': 'LBR',
 'libya': 'LBY',
 'liechtenstein': 'LIE',
 'lithuania': 'LTU',
 'luxembourg': 'LUX',
 'macau': 'MAC',
 'macedonia': 'MKD',
 'madagascar': 'MDG',
 'malawi': 'MWI',
 'malaysia': 'MYS',
 'maldives': 'MDV',
 'mali': 'MLI',
 'malta': 'MLT',
 'marshall islands': 'MHL',
 'mauritania': 'MRT',
 'mauritius': 'MUS',
 'mexico': 'MEX',
 'micronesia': 'FSM',
 'moldova': 'MDA',
 'monaco': 'MCO',
 'mongolia': 'MNG',
 'montenegro': 'MNE',
 'morocco': 'MAR',
 'mozambique': 'MOZ',
 'namibia': 'NAM',
 'nepal': 'NPL',
 'netherlands': 'NLD',
 'new caledonia': 'NCL',
 'new zealand': 'NZL',
 'nicaragua': 'NIC',
 'nigeria': 'NGA',
 'niger': 'NER',
 'niue': 'NIU',
 'northern mariana islands': 'MNP',
 'norway': 'NOR',
 'oman': 'OMN',
 'pakistan': 'PAK',
 'palau': 'PLW',
 'panama': 'PAN',
 'papua new guinea': 'PNG',
 'paraguay': 'PRY',
 'peru': 'PER',
 'philippines': 'PHL',
 'poland': 'POL',
 'portugal': 'PRT',
 'puerto rico': 'PRI',
 'qatar': 'QAT',
 'romania': 'ROU',
 'russia': 'RUS',
 'rwanda': 'RWA',
 'saint kitts and nevis': 'KNA',
 'saint lucia': 'LCA',
 'saint martin': 'MAF',
 'saint pierre and miquelon': 'SPM',
 'saint vincent and the grenadines': 'VCT',
 'samoa': 'WSM',
 'san marino': 'SMR',
 'sao tome and principe': 'STP',
 'saudi arabia': 'SAU',
 'senegal': 'SEN',
 'serbia': 'SRB',
 'seychelles': 'SYC',
 'sierra leone': 'SLE',
 'singapore': 'SGP',
 'sint maarten': 'SXM',
 'slovakia': 'SVK',
 'slovenia': 'SVN',
 'solomon islands': 'SLB',
 'somalia': 'SOM',
 'south africa': 'ZAF',
 'south sudan': 'SSD',
 'spain': 'ESP',
 'sri lanka': 'LKA',
 'sudan': 'SDN',
 'suriname': 'SUR',
 'swaziland': 'SWZ',
 'sweden': 'SWE',
 'switzerland': 'CHE',
 'syria': 'SYR',
 'taiwan': 'TWN',
 'tajikistan': 'TJK',
 'tanzania': 'TZA',
 'thailand': 'THA',
 'timor-leste': 'TLS',
 'togo': 'TGO',
 'tonga': 'TON',
 'trinidad and tobago': 'TTO',
 'tunisia': 'TUN',
 'turkey': 'TUR',
 'turkmenistan': 'TKM',
 'tuvalu': 'TUV',
 'uganda': 'UGA',
 'ukraine': 'UKR',
 'united arab emirates': 'ARE',
 'united kingdom': 'GBR',
 'united states': 'USA',
 'uruguay': 'URY',
 'uzbekistan': 'UZB',
 'vanuatu': 'VUT',
 'venezuela': 'VEN',
 'vietnam': 'VNM',
 'virgin islands': 'VGB',
 'west bank': 'WBG',
 'yemen': 'YEM',
 'zambia': 'ZMB',
 'zimbabwe': 'ZWE'}
In [25]:
## countries 
from collections import Counter
colorscale = ["#f7fbff", "#ebf3fb", "#deebf7", "#d2e3f3", "#c6dbef", "#b3d2e9", "#9ecae1",
    "#85bcdb", "#6baed6", "#57a0ce", "#4292c6", "#3082be", "#2171b5", "#1361a9",
    "#08519c", "#0b4083", "#08306b"
]
    
def geoplot(ddf):
    country_with_code, country = {}, {}
    shows_countries = ", ".join(ddf['country'].dropna()).split(", ")
    for c,v in dict(Counter(shows_countries)).items():
        code = ""
        if c.lower() in country_codes:
            code = country_codes[c.lower()]
        country_with_code[code] = v
        country[c] = v

    data = [dict(
            type = 'choropleth',
            locations = list(country_with_code.keys()),
            z = list(country_with_code.values()),
            colorscale = [[0,"#000000"],[0.65,"#007575"],[0.75,"#00A3A3"],[0.8,"#00D1D1"],[0.9,"#00FFFF"],[0.95,"#8AFFFF"],[1,"#FFFFFF"]],
            autocolorscale = False,
            reversescale = True,
            marker = dict(
                line = dict (
                    color = 'gray',
                    width = 0.5
                ) ),
            colorbar = dict(
                autotick = False,
                title = ''),
          ) ]

    layout = dict(
        title = '',
        geo = dict(
            showframe = False,
            showcoastlines = False,
            projection = dict(
                type = 'Mercator'
            )
        )
    )

    fig = dict( data=data, layout=layout )
    iplot( fig, validate=False, filename='d3-world-map' )
    return country

country_vals = geoplot(dt)
tabs = Counter(country_vals).most_common(25)

labels = [_[0] for _ in tabs][::-1]
values = [_[1] for _ in tabs][::-1]
trace1 = go.Bar(y=labels, x=values, orientation="h", name="", marker=dict(color="#48BFA0"))

data = [trace1]
layout = go.Layout(title="Countries with most content", height=700, legend=dict(x=0.1, y=1.1, orientation="h"))
fig = go.Figure(data, layout=layout)
fig.show()
In [26]:
import plotly.figure_factory as ff
x1 = dt2['duration'].fillna(0.0).astype(float)
fig = ff.create_distplot([x1],['a'],bin_size = 0.7,curve_type = 'normal', colors = ['#48BFA0'])
fig.update_layout(title_text = 'Distplot with Normal Distribution')
fig.show()
In [27]:
dt1.head()
Out[27]:
show_id type title director cast country date_added release_year rating duration listed_in description year_added month_added season_count
2 70234439 TV Show Transformers Prime NaN Peter Cullen, Sumalee Montano, Frank Welker, J... United States 2018-09-08 2013 TV-Y7-FV Kids' TV With the help of three human allies, the Autob... 2018.0 9.0 1
3 80058654 TV Show Transformers: Robots in Disguise NaN Will Friedle, Darren Criss, Constance Zimmer, ... United States 2018-09-08 2016 TV-Y7 Kids' TV When a prison ship crash unleashes hundreds of... 2018.0 9.0 1
5 80163890 TV Show Apaches NaN Alberto Ammann, Eloy Azorín, Verónica Echegui,... Spain 2017-09-08 2016 TV-MA Crime TV Shows, International TV Shows, Spanis... A young journalist is forced into a life of cr... 2017.0 9.0 1
8 80117902 TV Show Fire Chasers NaN NaN United States 2017-09-08 2017 TV-MA Docuseries, Science & Nature TV As California's 2016 fire season rages, brave ... 2017.0 9.0 1
26 80244601 TV Show Castle of Stars NaN Chaiyapol Pupart, Jintanutda Lummakanon, Worra... NaN 2018-09-07 2015 TV-14 International TV Shows, Romantic TV Shows, TV ... As four couples with different lifestyles go t... 2018.0 9.0 1
In [28]:
col = 'season_count'

vc1 = dt1[col].value_counts().reset_index()
vc1 = vc1.rename(columns = {col:'count','index':col})
vc1['percent'] = vc1['count'].apply(lambda x: 100 * x / sum(vc1['count']))
vc1 = vc1.sort_values(col)

trace1 = go.Bar(x = vc1[col], y = vc1['count'],name='TV Shows',marker = dict(color = '#48BFA0'))
data = [trace1]

layout = go.Layout(title = 'Seasons',legend=dict(x=0.1,y=1.1,orientation = 'h'))
fig = go.Figure(data,layout = layout)
fig.show()
In [29]:
col = 'rating'

vc1 = dt1[col].value_counts().reset_index()
vc1 = vc1.rename(columns = {col:'count','index':col})
vc1 = vc1.sort_values('count')

vc2 = dt2[col].value_counts().reset_index()
vc2 = vc2.rename(columns = {col:'count','index':col})
vc2 = vc2.sort_values('count')
In [30]:
trace1 = go.Bar(x = vc1[col],y = vc1['count'],name = 'TV Shows', marker = dict(color = '#48BFA0'))
trace2 = go.Bar(x = vc2[col],y = vc2['count'],name = 'Movies'  , marker = dict(color = '#F9ED94'))
data = [trace1,trace2]

layout = go.Layout(title = 'Ratings' , legend= dict(x = 0.1, y= 1.1, orientation = 'h'))

plt = go.Figure(data,layout = layout)
plt.show()
In [31]:
col = "listed_in"
categories = ", ".join(dt['listed_in']).split(", ")
counter_list = Counter(categories).most_common(50)
labels = [_[0] for _ in counter_list][::-1]
values = [_[1] for _ in counter_list][::-1]
trace1 = go.Bar(y=labels, x=values, orientation="h", name="TV Shows", marker=dict(color="#48BFA0"),width = 0.5)

data = [trace1]
layout = go.Layout(title="Content added over the years", legend=dict(x=0.1, y=1.1, orientation="h"))
fig = go.Figure(data, layout=layout)
fig.show()
In [39]:
def country_trace(country, flag = "movie"):
    dt["from_us"] = dt['country'].fillna("").apply(lambda x : 1 if country.lower() in x.lower() else 0)
    small = dt[dt["from_us"] == 1]
    if flag == "movie":
        small = small[small["duration"] != ""]
    else:
        small = small[small["season_count"] != ""]
    cast = ", ".join(small['cast'].fillna("")).split(", ")
    tags = Counter(cast).most_common(25)
    tags = [_ for _ in tags if "" != _[0]]

    labels, values = [_[0]+"  " for _ in tags], [_[1] for _ in tags]
    trace = go.Bar(y=labels[::-1], x=values[::-1], orientation="h", name="", marker=dict(color="#48BFA0"))
    return trace

from plotly.subplots import make_subplots
traces = []
titles = ["United States", "","India","", "United Kingdom", "Canada","", "Spain","", "Japan"]
for title in titles:
    if title != "":
        traces.append(country_trace(title))

fig = make_subplots(rows=2, cols=5, subplot_titles=titles)
fig.add_trace(traces[0], 1,1)
fig.add_trace(traces[1], 1,3)
fig.add_trace(traces[2], 1,5)
fig.add_trace(traces[3], 2,1)
fig.add_trace(traces[4], 2,3)
fig.add_trace(traces[5], 2,5)

fig.update_layout(height=1200, showlegend=False)
fig.show()
In [40]:
traces = []
titles = ["United States", "","India","", "United Kingdom", "Canada","", "Spain","", "Japan"]
for title in titles:
    if title != "":
        traces.append(country_trace(title,flag = 'TV Shows'))

fig = make_subplots(rows=2, cols=5, subplot_titles=titles)
fig.add_trace(traces[0], 1,1)
fig.add_trace(traces[1], 1,3)
fig.add_trace(traces[2], 1,5)
fig.add_trace(traces[3], 2,1)
fig.add_trace(traces[4], 2,3)
fig.add_trace(traces[5], 2,5)

fig.update_layout(height=1200, showlegend=False)
fig.show()